import pyopencl as cl
import numpy as np
import math

# -------------------------------
# Constants
# -------------------------------
PHI = 1.6180339887
D_TOTAL = 4096              # total lattice slots
INSTANCES = 8               # HDGL binary instances
SLOTS_PER = 32              # per instance
EVOLUTION_TICKS = 100
CONSOLE_WIDTH = 8           # simple visualization

# -------------------------------
# OpenCL Setup
# -------------------------------
platforms = cl.get_platforms()
gpu_devices = [d for p in platforms for d in p.get_devices(cl.device_type.GPU)]
ctx = cl.Context(devices=gpu_devices)
queue = cl.CommandQueue(ctx)

# -------------------------------
# Allocate lattice and workspace
# -------------------------------
lattice_host = np.zeros(D_TOTAL, dtype=np.float32)

# Allocate independent control & workspace per instance
slots_control = np.arange(0, INSTANCES*SLOTS_PER, dtype=np.int32)
slots_workspace = np.arange(1024, 1024 + INSTANCES*SLOTS_PER, dtype=np.int32)
slots_console = np.arange(D_TOTAL - CONSOLE_WIDTH, D_TOTAL, dtype=np.int32)  # last few slots

mf = cl.mem_flags
lattice_buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=lattice_host)
slots_control_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=slots_control)
slots_workspace_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=slots_workspace)

# -------------------------------
# OpenCL Kernel (GPU-agnostic)
# -------------------------------
kernel_source = """
__kernel void lattice_evolve(__global float *lattice,
                             const int D_TOTAL,
                             __global const int *slots_control,
                             const int n_control,
                             __global const int *slots_workspace,
                             const int n_workspace,
                             const float phi) {

    int gid = get_global_id(0);

    // Control evolution
    if (gid < n_control) {
        int idx = slots_control[gid];
        int left = (idx == 0) ? idx : idx-1;
        int right = (idx == D_TOTAL-1) ? idx : idx+1;
        lattice[idx] += 0.5f * (lattice[left] - lattice[right]);
    }

    // Workspace threshold projection
    if (gid < n_workspace) {
        int w_idx = slots_workspace[gid];
        lattice[w_idx] = (lattice[w_idx] >= sqrt(phi)) ? 1.0f : 0.0f;
    }
}
"""

program = cl.Program(ctx, kernel_source).build()
kernel = cl.Kernel(program, "lattice_evolve")

# -------------------------------
# Inject random snapshots > threshold
# -------------------------------
kernel_snapshot = (np.random.rand(len(slots_workspace)) * 2.0).astype(np.float32)  # 0..2 > sqrt(PHI)
cl.enqueue_copy(queue, lattice_buf, kernel_snapshot, dst_offset=1024 * 4)  # bytes

# -------------------------------
# Evolution Loop
# -------------------------------
for tick in range(EVOLUTION_TICKS):
    kernel.set_args(
        lattice_buf,
        np.int32(D_TOTAL),
        slots_control_buf, np.int32(len(slots_control)),
        slots_workspace_buf, np.int32(len(slots_workspace)),
        np.float32(PHI)
    )
    cl.enqueue_nd_range_kernel(queue, kernel, (len(slots_workspace),), None)

    # Read back console every 20 ticks
    if tick % 20 == 0:
        cl.enqueue_copy(queue, lattice_host, lattice_buf)
        console_out = ''.join(['#' if lattice_host[i] > 0 else '.' for i in slots_console])
        print(f"[Tick {tick}] Console: {console_out}")

# -------------------------------
# Final Lattice Snapshot
# -------------------------------
cl.enqueue_copy(queue, lattice_host, lattice_buf)
print("HDGL-native Debian Bootstrap Complete (GPU-agnostic OpenCL)")
print("Control + first 16 workspace slots:")
print(lattice_host[:len(slots_control)+16])
